package com.gitee.Transformation

import org.apache.flink.api.scala.ExecutionEnvironment

/*
  reduce是先拉取后合并
  reduceGroup是先合并再拉取,对比性能下面一个要更好

  但是都是要先进行分组的
 */
object ReduceGroupTest {
  def main(args: Array[String]): Unit = {
    val env: ExecutionEnvironment = ExecutionEnvironment.getExecutionEnvironment
    import org.apache.flink.api.scala._
    env.setParallelism(1)
    case class User(id: String, name: String)
    val lineData: DataSet[(String, Int)] = env.fromElements(("java",1),("jva",1),("java",1))

    lineData.groupBy(0).reduceGroup(iter=>iter.reduce((t1,t2)=>(t1._1,t1._2+t2._2))).print()
  }

}
